Interactive COVID-19 Visualization of United States
Awesome summary
## generic imports
import altair as alt
import pandas as pd
from vega_datasets import data
import glob
from os import listdir,path
from pathlib import Path
counties = alt.topo_feature(data.us_10m.url, 'counties')
# used for world visualization later
#countries = alt.topo_feature(data.world_110m.url, 'countries')
source = data.unemployment.url
#https://altair-viz.github.io/user_guide/faq.html
alt.data_transformers.disable_max_rows()
# https://altair-viz.github.io/user_guide/transform/lookup.html
#### month selection
month_select = alt.selection_single(
name='select', fields=['date'], init={'monthdate(date)':1},
bind=alt.binding_range(min=1, max=12, step =1)
)
### to add animation to chloropleth map, start the data with your data frame and then use lookup on the url, as altair can't
### dynamically resolve which data to animate and it can only do it with pd dataframes
directory_in_str = './COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports_us/'
def find_csv_filenames( path_to_dir, suffix):
filenames = listdir(path_to_dir)
return [ filename for filename in filenames if filename.endswith( suffix ) ]
# create clustering algorithm to cluster states into the 5 regions
def fiveRegion(statename):
# West]Colorado, Wyoming, Montana, Idaho, Washington, Oregon, Utah, Nevada, California, Alaska, Hawaii
regionName = ""
if statename == 'Alabama':
regionName = 'Southeast'
elif statename == 'Alaska':
regionName = 'West'
elif statename == 'Arizona':
regionName = 'Southwest'
elif statename == 'Arkansas':
regionName = 'Southeast'
elif statename == 'California':
regionName = 'West'
elif statename == 'Colorado':
regionName = 'West'
elif statename == 'Connecticut':
regionName = 'Northeast'
elif statename == 'Delaware':
regionName = 'Northeast'
# elif statename == 'District of Columbia':
# regionName = 'West'
elif statename == 'Florida':
regionName = 'Southeast'
elif statename == 'Georgia':
regionName = 'Southeast'
elif statename == 'Hawaii':
regionName = 'West'
elif statename == 'Idaho':
regionName = 'West'
elif statename == 'Illinois':
regionName = 'Midwest'
elif statename == 'Indiana':
regionName = 'Midwest'
elif statename == 'Iowa':
regionName = 'Midwest'
elif statename == 'Kansas':
regionName = 'Midwest'
elif statename == 'Kentucky':
regionName = 'Southeast'
elif statename == 'Louisiana':
regionName = 'Southeast'
elif statename == 'Maine':
regionName = 'Northeast'
elif statename == 'Maryland':
regionName = 'Northeast'
elif statename == 'Massachusetts':
regionName = 'Northeast'
elif statename == 'Michigan':
regionName = 'Midwest'
elif statename == 'Minnesota':
regionName = 'Midwest'
elif statename == 'Mississippi':
regionName = 'Southeast'
elif statename == 'Missouri':
regionName = 'Midwest'
elif statename == 'Montana':
regionName = 'West'
elif statename == 'Nebraska':
regionName = 'Midwest'
elif statename == 'Nevada':
regionName = 'West'
elif statename == 'New Hampshire':
regionName = 'Northeast'
elif statename == 'New Jersey':
regionName = 'Northeast'
elif statename == 'New Mexico':
regionName = 'Southwest'
elif statename == 'New York':
regionName = 'Northeast'
elif statename == 'North Carolina':
regionName = 'Southeast'
elif statename == 'North Dakota':
regionName = 'Midwest'
# elif statename == 'Northern Mariana Islands':
elif statename == 'Ohio':
regionName = 'Midwest'
elif statename == 'Oklahoma':
regionName = 'Southwest'
elif statename == 'Oregon':
regionName = 'West'
elif statename == 'Pennsylvania':
regionName = 'Northeast'
elif statename == 'Rhode Island':
regionName = 'Northeast'
elif statename == 'South Carolina':
regionName = 'Southeast'
elif statename == 'South Dakota':
regionName = 'Midwest'
elif statename == 'Tennessee':
regionName = 'Southeast'
elif statename == 'Texas':
regionName = 'Southwest'
elif statename == 'Utah':
regionName = 'West'
elif statename == 'Vermont':
regionName = 'Northeast'
# elif statename == 'Virgin Islands':
elif statename == 'Virginia':
regionName = 'Southeast'
elif statename == 'Washington':
regionName = 'West'
elif statename == 'West Virginia':
regionName = 'Southeast'
elif statename == 'Wisconsin':
regionName = 'Midwest'
elif statename == 'Wyoming':
regionName = 'West'
else:
regionName = 'Not a region'
return regionName
# open covid 'csse_covid_19_daily_reports_us'
filePath = './COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports_us/01-01-2021.csv'
filename = path.basename('/csse_covid_19_daily_reports_us/01-01-2021.csv')
# filePath = str(filePath)
print(filename)
df = pd.read_csv('%s' % filePath)
df["date"] = filename.replace('.csv','')
# df.to_csv("{filename}.csv", index=False)
# list comprehensions
df['region'] = [fiveRegion(x) for x in df['Province_State']]
df['FIPS'] = [int(x) for x in df['FIPS']]
# cleanup
df = df.rename({"Province_State": "State", "Country_Region":"Country"}, axis='columns')
df.shape
csvlist21 = find_csv_filenames(directory_in_str, "2021.csv")
csvlist20 = find_csv_filenames(directory_in_str, "2020.csv")
csvlist21new = [directory_in_str + s for s in csvlist21]
csvlist20new = [directory_in_str + s for s in csvlist20]
# print(csvlist)
data21 = [] # pd.concat takes a list of dataframes as an agrument
data20 = [] # pd.concat takes a list of dataframes as an agrument
#https://stackoverflow.com/questions/41857659/python-pandas-add-filename-column-csv
#https://stackoverflow.com/questions/9234560/find-all-csv-files-in-a-directory-using-python
for csv in csvlist21new:
frame = pd.read_csv(csv)
datestring = path.basename(csv)
curDate = datestring.replace('.csv', '')
frame['date'] = curDate
splitDate = curDate.split('-')
frame['month'] = splitDate[0]
frame['day'] = splitDate[1]
frame['year'] = splitDate[2]
cols=['year','month','day']
frame['ymd'] = frame[cols].apply(lambda x: '-'.join(x.values.astype(str)), axis="columns")
# frame['ymd']=pd.to_datetime(frame['ymd'])
data21.append(frame)
######### THE SAME FOR 2020
for csv in csvlist20new:
frame = pd.read_csv(csv)
datestring = path.basename(csv)
curDate = datestring.replace('.csv', '')
frame['date'] = curDate
splitDate = curDate.split('-')
frame['month'] = splitDate[0]
frame['day'] = splitDate[1]
frame['year'] = splitDate[2]
cols=['year','month','day']
frame['ymd'] = frame[cols].apply(lambda x: '-'.join(x.values.astype(str)), axis="columns")
# frame['ymd']=pd.to_datetime(frame['ymd'])
data20.append(frame)
# combine all csvs into one csv
# add date to each respective file
bigframe21 = pd.concat(data21, ignore_index=True) #dont want pandas to try an align row indexes
# list comprehensions
bigframe21['region'] = [fiveRegion(x) for x in bigframe21['Province_State']]
# Delete rows where region is 'not a region'
# This deletion is completed by "selecting" rows where regions are non 'not a region'
bigframe21 = bigframe21.loc[bigframe21["region"] != 'Not a region']
bigframe21['FIPS'] = [int(x) for x in bigframe21['FIPS']]
# cleanup
bigframe21 = bigframe21.rename({"Province_State": "State", "Country_Region":"Country"}, axis='columns')
###### THE SAME for 2020
bigframe20 = pd.concat(data20, ignore_index=True) #dont want pandas to try an align row indexes
# list comprehensions
bigframe20['region'] = [fiveRegion(x) for x in bigframe20['Province_State']]
# Delete rows where region is 'not a region'
# This deletion is completed by "selecting" rows where regions are non 'not a region'
bigframe20 = bigframe20.loc[bigframe20["region"] != 'Not a region']
bigframe20['FIPS'] = [int(x) for x in bigframe20['FIPS']]
# cleanup
bigframe20 = bigframe20.rename({"Province_State": "State", "Country_Region":"Country"}, axis='columns')
# df.to_csv("total.csv", index=False)
colorBrewer = alt.Color('region:N',
scale=alt.Scale(domain=[
'Midwest',
'Northeast',
'Southeast',
'Southwest',
'West'
],range=['#edf8fb','#b2e2e2','#66c2a4','#2ca25f','#006d2c']))
states = alt.topo_feature(data.us_10m.url, 'states')
map1= alt.Chart(states).mark_geoshape(
fill='#e8e4f3', stroke='black', strokeWidth=1
).project(
type='albersUsa'
).properties(
width=1000,
height=650
)
# map1
click = alt.selection_single(fields=['State'], bind='legend')
covid21= alt.Chart(bigframe21).mark_circle().transform_filter(
# {'not': alt.FieldOneOfPredicate(field='Province_State', oneOf=["Diamond Princess", "Grand Princess", "Puerto Rico", "Guam", "Virgin Islands"])}
{'not': alt.FieldEqualPredicate(field='region', equal="Not a region")}
).encode(
longitude='Long_:Q',
latitude='Lat:Q',
color=alt.Color('region:N', scale=alt.Scale(scheme='viridis')),
size=alt.Size('Confirmed',
# scale=alt.Scale(domain=[-1, 200], range=[10,400])
scale=alt.Scale(range=[100, 3000]),
legend=None
),
tooltip=['State:N','Confirmed:Q','Deaths:Q'],
# opacity=alt.condition(click, alt.value(1), alt.value(0.3))
).properties(width=1000,height=650,title='Confirmed Cases 2020')
# full map
# testArea = alt.layer(areachart, lineChart).add_selection(month_select).transform_filter(month_select).resolve_scale(y='independent', color='independent').properties(width=600, height=600).interactive()
covid20= alt.Chart(bigframe20).mark_circle().transform_filter(
# {'not': alt.FieldOneOfPredicate(field='Province_State', oneOf=["Diamond Princess", "Grand Princess", "Puerto Rico", "Guam", "Virgin Islands"])}
{'not': alt.FieldEqualPredicate(field='region', equal="Not a region")}
).encode(
longitude='Long_:Q',
latitude='Lat:Q',
color=alt.Color('region:N', scale=alt.Scale(scheme='viridis')),
size=alt.Size('Confirmed',
# scale=alt.Scale(domain=[-1, 200], range=[10,400])
scale=alt.Scale(range=[100, 3000]),
legend=None
),
tooltip=['State:N','Confirmed:Q','Deaths:Q'],
# opacity=alt.condition(click, alt.value(1), alt.value(0.3))
).properties(width=1000,height=650,title='Confirmed Cases 2020')
(map1 + covid21) | (map1 + covid20)